Hello, Quarto

library(ggplot2)
library(plotly)

Attaching package: 'plotly'
The following object is masked from 'package:ggplot2':

    last_plot
The following object is masked from 'package:stats':

    filter
The following object is masked from 'package:graphics':

    layout
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ lubridate 1.9.3     ✔ tibble    3.2.1
✔ purrr     1.0.2     ✔ tidyr     1.3.1
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks plotly::filter(), stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
df <- read.csv('data/processed-data/gender_inequality_and_economic_indicators_dataset_clean_final.csv')
# look up table for region abbreviations
region_labels <- c(
    "East Asia & Pacific" = "EAP",
    "Europe & Central Asia" = "ECA",
    "High income: OECD" = "HIC",
    "Latin America & Caribbean" = "LAC",
    "Middle East & North Africa" = "MENA",
    "South Asia" = "SA",
    "Sub-Saharan Africa" = "SSA"
)
# summary stats for gdp data
gdp_summary <- df %>%
    group_by(Region) %>%
    summarise(
        Mean_GDP_Score = mean(gdp_per_person_employed, na.rm = TRUE),
        Median_GDP_Score = median(gdp_per_person_employed, na.rm = TRUE),
        SD_GDP_Score = sd(gdp_per_person_employed, na.rm = TRUE)
    )

gdp_summary
# A tibble: 7 × 4
  Region                     Mean_GDP_Score Median_GDP_Score SD_GDP_Score
  <chr>                               <dbl>            <dbl>        <dbl>
1 East Asia & Pacific                44012.           33029.       49472.
2 Europe & Central Asia              49127.           48457.       20743.
3 High income: OECD                 114705.          109444.       41572.
4 Latin America & Caribbean          43000.           39516.       23466.
5 Middle East & North Africa         66099.           58770.       33609.
6 South Asia                         25070.           20069.       11615.
7 Sub-Saharan Africa                 18903.           11898.       18469.
# summary stats for mobility data
mobility_summary <- df %>%
    group_by(Region) %>%
    summarise(
        Mean_Mobility_Score = mean(MOBILITY, na.rm = TRUE),
        Median_Mobility_Score = median(MOBILITY, na.rm = TRUE),
        SD_Mobility_Score = sd(MOBILITY, na.rm = TRUE)
    )

mobility_summary
# A tibble: 7 × 4
  Region             Mean_Mobility_Score Median_Mobility_Score SD_Mobility_Score
  <chr>                            <dbl>                 <dbl>             <dbl>
1 East Asia & Pacif…                89                   100               16.3 
2 Europe & Central …                98.9                 100                5.21
3 High income: OECD                100                   100                0   
4 Latin America & C…                90.6                 100               13.8 
5 Middle East & Nor…                46.2                  37.5             39.1 
6 South Asia                        90.6                 100               18.6 
7 Sub-Saharan Africa                81.8                  87.5             22.3 
## distribution of gdp scores
# histogram
ggplot(df, aes(x = gdp_per_person_employed, fill = Region)) +
    geom_histogram(binwidth = 50000) +
    theme_minimal() +
    labs(
        title = "Distribution of GDP by Region",
        x = "GDP",
        y = "Frequency"
    ) +
    scale_x_continuous(breaks = seq(0, 10000000, by = 50000)) +
    theme_minimal()

## distribution of mobility scores
# histogram
mobility_p <- ggplot(df, aes(x = MOBILITY, fill= Region)) +
    geom_histogram(binwidth = 20) +
    theme_minimal() +
    labs(
        title = "Distribution of Mobility Scores",
        x = "Mobility Score",
        y = "Frequency"
    ) +
    scale_x_continuous(breaks = seq(0, 100, by = 20)) +
    theme_minimal()

ggplotly(mobility_p)
# compare mobility score across regions
ggplot(df, aes(x = Region, y = MOBILITY, fill = Region)) +
    geom_boxplot() +
    scale_x_discrete(labels = region_labels) +
    theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
    labs(title = "Mobility Scores by Region", x = "Region", y = "Mobility Score")

# select mobility-related categorical columns
mobility_categorical <- df %>%
    select(Region, Can.a.woman.choose.where.to.live.in.the.same.way.as.a.man., Can.a.woman.travel.outside.her.home.in.the.same.way.as.a.man., Can.a.woman.apply.for.a.passport.in.the.same.way.as.a.man., Can.a.woman.travel.outside.the.country.in.the.same.way.as.a.man.)

# convert long for visualization
mobility_long <- mobility_categorical %>%
    pivot_longer(cols = -Region, names_to = "Indicator", values_to = "Response")

# calculate proportions
mobility_proportions <- mobility_long %>%
    group_by(Region, Indicator, Response) %>%
    summarise(Count = n(), .groups = "drop") %>%
    mutate(Proportion = Count / sum(Count))

mobility_proportions
# A tibble: 45 × 5
   Region                Indicator                     Response Count Proportion
   <chr>                 <chr>                         <chr>    <int>      <dbl>
 1 East Asia & Pacific   Can.a.woman.apply.for.a.pass… No           6    0.00789
 2 East Asia & Pacific   Can.a.woman.apply.for.a.pass… Yes         19    0.025  
 3 East Asia & Pacific   Can.a.woman.choose.where.to.… No           3    0.00395
 4 East Asia & Pacific   Can.a.woman.choose.where.to.… Yes         22    0.0289 
 5 East Asia & Pacific   Can.a.woman.travel.outside.h… No           2    0.00263
 6 East Asia & Pacific   Can.a.woman.travel.outside.h… Yes         23    0.0303 
 7 East Asia & Pacific   Can.a.woman.travel.outside.t… Yes         25    0.0329 
 8 Europe & Central Asia Can.a.woman.apply.for.a.pass… No           1    0.00132
 9 Europe & Central Asia Can.a.woman.apply.for.a.pass… Yes         22    0.0289 
10 Europe & Central Asia Can.a.woman.choose.where.to.… Yes         23    0.0303 
# ℹ 35 more rows
# look up table for mobility indicators
mobility_labels <- c(
    "Can.a.woman.choose.where.to.live.in.the.same.way.as.a.man." = "Choice of Living Situation", "Can.a.woman.travel.outside.her.home.in.the.same.way.as.a.man." = "Travel Outside Home", "Can.a.woman.apply.for.a.passport.in.the.same.way.as.a.man." = "Passport Acquisition", "Can.a.woman.travel.outside.the.country.in.the.same.way.as.a.man." = "Intercontinental Travel"
)

# visualize proportions

ggplot(mobility_proportions, aes(x = Region, y = Proportion, fill = Response)) +
    geom_bar(stat = "identity", position = "fill") +
    facet_wrap(~Indicator, scales = "free", labeller = labeller(Indicator = mobility_labels)) +
    scale_x_discrete(labels = region_labels) +
    theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
    labs(
        title = "Proportions of Yes/No Responses for Mobility Indicators By Region",
        x = "Region", y = "Proportion"
    )

mob_plt_1 <- ggplot(df, aes(x = Can.a.woman.choose.where.to.live.in.the.same.way.as.a.man., fill = Region)) +
    geom_bar(position = "dodge") + # 'dodge' puts bars side by side
    labs(title = "Can a woman choose where to live in the same way as a man by Region", x = "Can a woman choose where to live in the same way as a man", y = "Count") +
    theme_minimal()

ggplotly(mob_plt_1)
mob_plt_2 <- ggplot(df, aes(x = Can.a.woman.travel.outside.her.home.in.the.same.way.as.a.man., fill = Region)) +
    geom_bar(position = "dodge") + # 'dodge' puts bars side by side
    labs(title = "Can a woman travel outside her home in the same way as a man", x = "Can a woman travel outside her home in the same way as a man", y = "Count") +
    theme_minimal()

ggplotly(mob_plt_2)
mob_plt_3 <- ggplot(df, aes(x = Can.a.woman.apply.for.a.passport.in.the.same.way.as.a.man., fill = Region)) +
    geom_bar(position = "dodge") + # 'dodge' puts bars side by side
    labs(title = "Can a woman apply for a passport in the same way as a man", x = "Can a woman apply for a passport in the same was as a man", y = "Count") +
    theme_minimal()

ggplotly(mob_plt_3)
mob_plt_4 <- ggplot(df, aes(x = Can.a.woman.travel.outside.the.country.in.the.same.way.as.a.man., fill = Region)) +
    geom_bar(position = "dodge") + # 'dodge' puts bars side by side
    labs(title = "Can a woman travel outside the country in the same way as a man?", x = "Can a woman travel outside the country in the same way as a man?", y = "Count") +
    theme_minimal()

ggplotly(mob_plt_4)
# Welch Two Sample t-test
t_test_result <- t.test(MOBILITY ~ Can.a.woman.choose.where.to.live.in.the.same.way.as.a.man., data = df)

print(t_test_result)

    Welch Two Sample t-test

data:  MOBILITY by Can.a.woman.choose.where.to.live.in.the.same.way.as.a.man.
t = -11.11, df = 38.442, p-value = 1.449e-13
alternative hypothesis: true difference in means between group No and group Yes is not equal to 0
95 percent confidence interval:
 -58.07865 -40.18138
sample estimates:
 mean in group No mean in group Yes 
         46.62162          95.75163 
# Welch Two Sample t-test
t_test_result <- t.test(MOBILITY ~ Can.a.woman.travel.outside.her.home.in.the.same.way.as.a.man., data = df)

print(t_test_result)

    Welch Two Sample t-test

data:  MOBILITY by Can.a.woman.travel.outside.her.home.in.the.same.way.as.a.man.
t = -12.536, df = 17.37, p-value = 3.921e-10
alternative hypothesis: true difference in means between group No and group Yes is not equal to 0
95 percent confidence interval:
 -78.48727 -55.90579
sample estimates:
 mean in group No mean in group Yes 
         25.00000          92.19653 
# Welch Two Sample t-test
t_test_result <- t.test(MOBILITY ~ Can.a.woman.apply.for.a.passport.in.the.same.way.as.a.man., data = df)

print(t_test_result)

    Welch Two Sample t-test

data:  MOBILITY by Can.a.woman.apply.for.a.passport.in.the.same.way.as.a.man.
t = -8.5657, df = 48.039, p-value = 3.098e-11
alternative hypothesis: true difference in means between group No and group Yes is not equal to 0
95 percent confidence interval:
 -46.41090 -28.76507
sample estimates:
 mean in group No mean in group Yes 
         56.70732          94.29530 
# Welch Two Sample t-test
t_test_result <- t.test(MOBILITY ~ Can.a.woman.travel.outside.the.country.in.the.same.way.as.a.man., data = df)

print(t_test_result)

    Welch Two Sample t-test

data:  MOBILITY by Can.a.woman.travel.outside.the.country.in.the.same.way.as.a.man.
t = -18.855, df = 10.716, p-value = 1.46e-09
alternative hypothesis: true difference in means between group No and group Yes is not equal to 0
95 percent confidence interval:
 -89.83431 -70.99902
sample estimates:
 mean in group No mean in group Yes 
         10.00000          90.41667 
# scatter plot for Mobility score vs gpd per person employed
reg_mob <- ggplot(df, aes(x = MOBILITY, y = gdp_per_person_employed)) +
    geom_point() +
    geom_smooth(method = "lm", color = "red") +
    theme_minimal() +
    labs(
        title = "Mobility Score vs GDP per Person Employed",
        x = "Mobility Score", y = "GDP per Person Employed"
    )
ggplotly(reg_mob)
`geom_smooth()` using formula = 'y ~ x'
# density plot
ggplot(df, aes(x = MOBILITY)) +
    geom_density(fill = "skyblue", alpha = 0.5, color = "black") +
    geom_vline(aes(xintercept = mean(MOBILITY, na.rm = TRUE)), color = "red", linetype = "dashed", linewidth = 1) +
    geom_vline(aes(xintercept = median(MOBILITY, na.rm = TRUE)), color = "blue", linetype = "dashed", linewidth = 1) +
    theme_minimal() +
    labs(title = "Density of Mobility Score", x = "Mobility Score", y = "Density") +
    annotate("text", x = mean(df$MOBILITY, na.rm = TRUE), y = 0.015, label = "Mean", color = "red", angle = 90, vjust = 1.5) +
    annotate("text", x = median(df$MOBILITY, na.rm = TRUE), y = 0.012, label = "Median", color = "blue", angle = 90, vjust = 1.5)

print(mean(df$MOBILITY))
[1] 86.18421
print(median(df$MOBILITY))
[1] 100